# ---
# Fudenberg, Gao, & Liang:
# "How Flexible is that Functional Form? Quantifying the Restrictiveness of Theories"
# 
# Application 3: Microfinance Takeup

# "NetLinear.R"

# Research assistant: Stephanie Nam
# Date: 09/24/2023

# NOTE: This R script computes completeness and restrictiveness for the linear model

# ---

load('NetLinearData.RData')
sink('NetLinear.log')
##1. Initialize Simulations

set.seed(1)

M <- 10000 # number of simulations
# Generate M iterations of take-up rate across 43 villages
p_m <- data.frame(replicate(M, runif(43)))


## Restrictiveness

err_lin_rest <- data.frame()
err_naive_rest <- data.frame()
for(i in 1:M) {
  p_i <- unlist(p_m[i])
  err_naive <- var(p_i)*42/43
  err_naive_rest<- rbind(err_naive_rest, err_naive)
  
  reg1 <- lm(p_i ~ leader_eig)
  reg2 <- lm(p_i ~ leader_eig + leaders_deg)
  reg3 <- lm(p_i ~ leader_eig + leaders_deg + avg_deg)
  reg4 <- lm(p_i ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness)
  reg5 <- lm(p_i ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff) 
  reg6 <- lm(p_i ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff + avg_path_length)  
  reg7 <- lm(p_i ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff + avg_path_length + comp_count)
  reg8 <- lm(p_i ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff + avg_path_length + comp_count + leaders_rate)
  
  err_lin <- c(deviance(reg1)/43,deviance(reg2)/43,deviance(reg3)/43,deviance(reg4)/43,deviance(reg5)/43,deviance(reg6)/43,deviance(reg7)/43,deviance(reg8)/43)
  err_lin_rest <- rbind(err_lin_rest, err_lin)
  
}
names(err_lin_rest) <- c("reg1", "reg2", "reg3", "reg4", "reg5", "reg6", "reg7", "reg8")

mean_err_lin_rest <- colMeans(err_lin_rest)
mean_err_naive_rest <- colMeans(err_naive_rest)

rest_lin <- mean_err_lin_rest / mean_err_naive_rest # Restrictiveness


var_err_lin_rest <- apply(err_lin_rest, 2, var)
var_err_naive_rest <- apply(err_naive_rest, 2, var)
mat_lin_rest <- data.matrix(err_lin_rest)
cov_err_rest <- rep(NA,8)
for (i in 1:8) {
  cov_err_rest[i] <- cov(mat_lin_rest[,i], err_naive_rest)
}

avar_rest_lin <- 1/ (mean_err_naive_rest^2) * (var_err_lin_rest - 2 * rest_lin * cov_err_rest + rest_lin^2 * var_err_naive_rest)
se_rest_lin <- sqrt(avar_rest_lin /M) # Standard erorrs for restrictiveness

REST_linear <- data.frame(rest_lin, se_rest_lin)



## Completeness

err_naive_comp <- var(mf_rate)*42/43
resid_naive_comp <- mf_rate - mean(mf_rate)
var_err_naive_comp <- var(resid_naive_comp^2)*42/43

reg1 <- lm(mf_rate ~ leader_eig)
reg2 <- lm(mf_rate ~ leader_eig + leaders_deg)
reg3 <- lm(mf_rate ~ leader_eig + leaders_deg + avg_deg)
reg4 <- lm(mf_rate ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness)
reg5 <- lm(mf_rate ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff) 
reg6 <- lm(mf_rate ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff + avg_path_length)  
reg7 <- lm(mf_rate ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff + avg_path_length + comp_count)
reg8 <- lm(mf_rate ~ leader_eig + leaders_deg + avg_deg + leaders_betweenness + cluster_coeff + avg_path_length + comp_count + leaders_rate)

err_lin_comp <- c(deviance(reg1)/43,deviance(reg2)/43,deviance(reg3)/43,deviance(reg4)/43,deviance(reg5)/43,deviance(reg6)/43,deviance(reg7)/43,deviance(reg8)/43)
comp_lin <- 1 - err_lin_comp/err_naive_comp # Completeness

R2_lin <- c(summary(reg1)$r.squared,
              summary(reg2)$r.squared,
              summary(reg3)$r.squared,
              summary(reg4)$r.squared,
              summary(reg5)$r.squared,
              summary(reg6)$r.squared,
              summary(reg7)$r.squared,
              summary(reg8)$r.squared)


resid_lin_comp <- data.frame(reg1$residuals,reg2$residuals,reg3$residuals,reg4$residuals,
                    reg5$residuals,reg6$residuals,reg7$residuals,reg8$residuals)

var_err_lin_comp <- 42/43 * c(var(reg1$residuals^2),
                              var(reg2$residuals^2),
                              var(reg3$residuals^2),
                              var(reg4$residuals^2),
                              var(reg5$residuals^2),
                              var(reg6$residuals^2),
                              var(reg7$residuals^2),
                              var(reg8$residuals^2))
                              
  
cov_err_comp <- rep(NA,8)
for (i in 1:8) {
  cov_err_comp[i] <- cov(resid_lin_comp[,i]^2, resid_naive_comp^2) * 42/43
}

avar_comp_lin <- 1/ (err_naive_comp^2) * (var_err_lin_comp - 2 * (1-comp_lin) * cov_err_comp + (1-comp_lin)^2 * var_err_naive_comp)
se_comp_lin <- sqrt(avar_comp_lin / 43) # Standard error for completeness

COMP_linear <- data.frame(comp_lin, se_comp_lin)

REST_linear
COMP_linear

save.image(file = "NetLinearResults.RData")

sink()

